library(cowplot)
library(Cairo)
library(fixest)
library(scales)
library(gtable)
library(egg)
library(gridExtra)

discrete_var <- c("gender", "race", "dwelling_type", "rural_code", 
                  "vote_file_party", "precision_reg_geocode")
bin_age <- function(df, age_col_name = "age"){
  stopifnot(age_col_name %in% names(df))
  
  age_var <- sym(age_col_name)
  
  df <- df %>% 
    mutate(!!age_var := case_when(
      !!age_var >= 18 & !!age_var < 25 ~ "18-25",
      !!age_var >= 25 & !!age_var < 35 ~ "25-35",
      !!age_var >= 35 & !!age_var < 50 ~ "35-50",
      !!age_var >= 50 & !!age_var < 70 ~ "50-70",
      !!age_var >= 70 ~ "70+",
      TRUE ~ NA_character_
    ))
}
simpleCap <- function(x) {
  s <- strsplit(x, " ")[[1]]
  paste(toupper(substring(s, 1,1)), substring(s, 2),
        sep="", collapse=" ")
}
camel <- function(x){ #function for camel case
  capit <- function(x) paste0(toupper(substring(x, 1, 1)), substring(x, 2, nchar(x)))
  sapply(strsplit(x, "\\."), function(x) paste(capit(x), collapse=""))
}

get_se_pe<-function(df){
    
    model_fe=feols(voted~treatment|block_id,data = df)
    list("pe"=coef(model_fe)["treatmenttreatment"],"tse"=se(summary(model_fe, cluster = df$household_id)))
}

bin_home_value <- function(df, val_col_name = "home_value_raw"){
  stopifnot(val_col_name %in% names(df))
  
  value_var <- sym(val_col_name)
  
  df <- df %>% 
    mutate(!!value_var := case_when(
      !!value_var < 50 ~ "Less than 50K",
      !!value_var>= 50 & !!value_var < 100 ~ "50K-100K",
      !!value_var >=  100 & !!value_var < 150  ~ "100K-150K",
      !!value_var >= 150 & !!value_var < 200~ "150K-200K",
      !!value_var >= 200& !!value_var < 250~ "200K-250K",
      !!value_var >= 250 & !!value_var <300 ~ "250K-300K",
      !!value_var >= 300 & !!value_var < 400 ~ "300K-400K",
      !!value_var >= 400 & !!value_var < 500 ~ "400K-500K",
      !!value_var >= 500 & !!value_var < 750 ~ "500K-700K",
      !!value_var >= 750& !!value_var < 1000 ~ "750K-1M",
      !!value_var >= 1000 ~ "1M+",
      TRUE ~ NA_character_
    ))
}

plot_real_estate<- function(df,breaks = c(100,500),
                            labels = c("100","500")){
  df_plot <- df%>%
    bin_home_value() %>% 
    filter(!is.na(home_value_raw))

  col_names = c("home_value_raw")
  combined_df = tibble()
  for(c in col_names){
    col_var <- sym(c)
    if(c %in% discrete_var){
      
      df_plot <- df_plot %>%
        mutate(!!col_var := factor(!!col_var))
    }
    df_temp = df_plot %>%
      group_by(treatment) %>%
      count(!!col_var) %>% 
      mutate(total = sum(n),
             proportion =  n / total)%>%
      rename(x=col_var)%>%
      mutate(group_id =simpleCap( c)) %>% 
      group_by(x) %>% 
      summarize(
        t=total[treatment=="treatment"],
        tc=total[treatment=="control"],
        t_prop=proportion[treatment=="treatment"],
        c_prop=proportion[treatment=="control"],
        
        diff = (t_prop-c_prop)
        /c_prop,group_id=group_id[1],
        dot_size=n[treatment=="control"])
    
    df_temp= df_temp %>%
      mutate(x = factor(x,levels=c(
        "Less than 50K" ,"50K-100K", "100K-150K" ,
        "150K-200K" , 
        "200K-250K","250K-300K","300K-400K",
        "400K-500K","500K-700K","750K-1M","1M+"
      ), ordered=TRUE))
    
    combined_df =rbind.data.frame(combined_df,df_temp)
  }
  combined_df = combined_df %>% 
    mutate(group_id = ifelse(group_id=="Home_value_raw","Home Value",group_id))
  p2=ggplot(combined_df,aes(x = x, y = diff)) +
    geom_line(aes(group = 1),na.rm=TRUE,color="black",linetype="dashed") + 

    labs(size="Number of registrants\n",
         y = "Relative difference between \ntreatment and control")+
    scale_size_continuous(breaks=breaks,
                          labels = labels)+
    scale_y_continuous(labels = percent,limits=c(-.3,.3)) +
    guides(size=guide_legend(title.position="top", title.hjust = 0.1))+
    scale_color_manual(values=c("black",
                                "#fc8d62"), name="Treatment Assignment\n") + 
    scale_linetype_manual(values=c("dashed", "solid"),name="Treatment Assignment\n") +
    scale_shape_manual(values=c(2, 19),name="Treatment Assignment\n") +
    geom_point(na.rm=TRUE,color="black",aes(size=dot_size)) +
    facet_wrap(~ group_id, scales = "free", ncol = 5, drop = FALSE) + 
    theme(axis.text.x = element_text(angle = 40, vjust=.7, hjust=.7),
          panel.grid.minor = element_blank(), 
          panel.border = element_blank(), axis.title.x=element_blank(),
          strip.background =element_rect(fill="#e0ecf4"),
          panel.grid.major = element_blank(), 
          panel.background = element_blank(), axis.line = element_line(colour = "black"),
        
          legend.background=element_blank(),
          legend.position = "bottom",
          
          legend.key = element_rect(colour = "black", fill = NA),
          legend.key.size = unit(2,"line"),
      
          plot.title = element_text(size=44,  family="Times New Roman"),
          text = element_text(size=36,  family="Times New Roman"))
  
  p2
}

plot_real_estate_default<- function(df,breaks = c(100,500),
                            labels = c("100","500")){
  df_plot <- df%>%
    bin_home_value() %>% 
    filter(!is.na(home_value_raw))
  
  col_names = c("home_value_raw")
  combined_df = tibble()
  for(c in col_names){
    col_var <- sym(c)
    if(c %in% discrete_var){
      
      df_plot <- df_plot %>%
        mutate(!!col_var := factor(!!col_var))
    }
    df_temp = df_plot %>%
      group_by(treatment) %>%
      count(!!col_var) %>% 
      mutate(total = sum(n),
             proportion =  n / total)%>%
      rename(x=col_var)%>%
      mutate(group_id =simpleCap( c)) %>% 
      group_by(x) %>% 
      summarize(
        t=total[treatment=="treatment"],
        tc=total[treatment=="control"],
        t_prop=proportion[treatment=="treatment"],
        c_prop=proportion[treatment=="control"],
        
        diff = (t_prop-c_prop)
        /c_prop,group_id=group_id[1],
        dot_size=n[treatment=="control"])
    
    df_temp= df_temp %>%
      mutate(x = factor(x,levels=c(
        "Less than 50K" ,"50K-100K", "100K-150K" ,
        "150K-200K" , 
        "200K-250K","250K-300K","300K-400K",
        "400K-500K","500K-700K","750K-1M","1M+"
      ), ordered=TRUE))
    
    combined_df =rbind.data.frame(combined_df,df_temp)
  }
  combined_df = combined_df %>% 
    mutate(group_id = ifelse(group_id=="Home_value_raw","Home Value",group_id))
  p2=ggplot(combined_df,aes(x = x, y = diff)) +
    geom_line(aes(group = 1),na.rm=TRUE,color="black",linetype="dashed") + 
    
    labs(size="Number of registrants\n",
         y = "Relative difference between \ntreatment and control")+
    scale_size_continuous(breaks=breaks,
                          labels = labels)+
    scale_y_continuous(labels = percent,limits=c(-.3,.3)) +
    guides(size=guide_legend(title.position="top", title.hjust = 0.1))+
    scale_color_manual(values=c("black",
                                "#fc8d62"), name="Treatment Assignment\n") + 
    scale_linetype_manual(values=c("dashed", "solid"),name="Treatment Assignment\n") +
    scale_shape_manual(values=c(2, 19),name="Treatment Assignment\n") +
    geom_point(na.rm=TRUE,color="black",aes(size=dot_size)) +
    facet_wrap(~ group_id, scales = "free", ncol = 5, drop = FALSE) 
  
  p2
}

plot_real_estate_scatter<-function(df){
  valid_values = df %>% 
    group_by(block_id) %>% 
    filter(any(!is.na(home_value_raw[treatment=="treatment"])) & 
             any(!is.na(home_value_raw[treatment=="control"]))) %>%
    ungroup()
  
  analyze_values = valid_values %>% 
    group_by(block_id) %>% 
    summarize(home_value_treatment = 
                mean(home_value_raw[treatment=="treatment"],na.rm=T),
              home_value_control = 
                mean(home_value_raw[treatment=="control"],na.rm=T),
              dif_value = (home_value_treatment-home_value_control)
    )
  p=  ggplot(analyze_values, aes(x=home_value_control,y=home_value_treatment,na.rm=T))+
    geom_point(color="#fc8d59",alpha=.3,size=2)+
    xlim(0,1000)+ylim(0,1000)+coord_fixed()+
    geom_smooth(data=analyze_values,aes(x = home_value_control, 
                                        y = home_value_treatment),color="#66c2a5", fill="#8da0cb",method='lm')+
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_line(colour = "black"),
          text = element_text(size=36,  family="Times New Roman"),
          legend.key = element_rect(colour = NA, fill = NA),
          strip.background = element_blank(),
          #strip.text.x = element_blank(),
          strip.text = element_blank(),panel.spacing = unit(2, "lines")
          
    ) +labs(
      x = "Average home value \nof control group \n(thousands of dollars)",
      y = "Average home value \nof treatment group \n(thousands of dollars)")
  p
}

plot_real_estate_scatter_default<-function(df){
  valid_values = df %>% 
    group_by(block_id) %>% 
    filter(any(!is.na(home_value_raw[treatment=="treatment"])) & 
             any(!is.na(home_value_raw[treatment=="control"]))) %>%
    ungroup()
  
  analyze_values = valid_values %>% 
    group_by(block_id) %>% 
    summarize(home_value_treatment = 
                mean(home_value_raw[treatment=="treatment"],na.rm=T),
              home_value_control = 
                mean(home_value_raw[treatment=="control"],na.rm=T),
              dif_value = (home_value_treatment-home_value_control)
    )
  p=  ggplot(analyze_values, aes(x=home_value_control,y=home_value_treatment,na.rm=T))+
    geom_point(color="#fc8d59",alpha=.3,size=2)+
    xlim(0,1000)+ylim(0,1000)+coord_fixed()+
    geom_smooth(data=analyze_values,aes(x = home_value_control, 
                                        y = home_value_treatment),color="#66c2a5", fill="#8da0cb",method='lm')+
labs(
      x = "Average home value \nof control group \n(thousands of dollars)",
      y = "Average home value \nof treatment group \n(thousands of dollars)")
  p
}

bin_voted <- function(df, val_col_name = "voted_2012"){
  stopifnot(val_col_name %in% names(df))
  df = df %>% 
    mutate(voted_2012 = ifelse(is.na(voted_2012),"n",voted_2012
    ))
  value_var <- sym(val_col_name)
  
  df <- df %>% 
    mutate(!!value_var := case_when(
      !!value_var =="p" ~ "In Person",
      !!value_var =="e" | !!value_var =="a" | !!value_var =="m" ~ "Substitution",
      !!value_var =="n" ~ "No Vote",
      TRUE~"Other"
    ))
}

historical_voting_plot<-function(df){

  df_plot <- df%>%
    bin_voted() 
  
  df_plot = df_plot%>% 
    filter(!is.na(voted_2012))
  col_names = c("voted_2012")
  
  
  combined_df = tibble()
  for(c in col_names){
    col_var <- sym(c)
    if(c %in% discrete_var){
      
      df_plot <- df_plot %>%
        mutate(!!col_var := factor(!!col_var))
    }
    df_temp = df_plot %>%
      group_by(treatment) %>%
      count(!!col_var) %>% 
      mutate(total = sum(n),
             proportion =  n / total)%>%
      rename(x=col_var)%>%
      mutate(group_id =simpleCap( c)) %>% 
      group_by(x) %>% 
      summarize(
        t=total[treatment=="treatment"],
        tc=total[treatment=="control"],
        t_prop=proportion[treatment=="treatment"],
        c_prop=proportion[treatment=="control"],
        
        diff = (t_prop-c_prop)
        /c_prop,group_id=group_id[1],
        dot_size=n[treatment=="control"])
    
    df_temp= df_temp %>%
      mutate(x = factor(x,levels=c(
        "In Person","Substitution","Other","No Vote"), ordered=TRUE))
    
    combined_df =rbind.data.frame(combined_df,df_temp)
  }
  
  combined_df = combined_df %>% 
    filter(x!="Other") %>% 
    mutate(group_id = ifelse(group_id=="Voted_2012","Vote Method",group_id))
  p2=ggplot(combined_df,aes(x = x, y = diff)) +
    geom_line(aes(group = 1),na.rm=TRUE,color="black",linetype="dashed") + 
    labs(size="Number of registrants\n",
         y = "Relative difference between \ntreatment and control")+
    scale_size_continuous(breaks=c(5000,10000),
                          labels = c("5,000","10,000"))+
    scale_y_continuous(labels = percent,limits=c(-.055,.055)) +
    guides(size=guide_legend(title.position="top", title.hjust = 0.1))+
    scale_color_manual(values=c("black",
                                "#fc8d62"), name="Treatment Assignment\n") + 
    scale_linetype_manual(values=c("dashed", "solid"),name="Treatment Assignment\n") +
    scale_shape_manual(values=c(2, 19),name="Treatment Assignment\n") +
    geom_point(na.rm=TRUE,color="black",aes(size=dot_size)) +
    facet_wrap(~ group_id, scales = "free", ncol = 5, drop = FALSE) + 
    theme(axis.text.x = element_text(angle = 40, vjust=.7, hjust=.7),
          panel.grid.minor = element_blank(), 
          panel.border = element_blank(), axis.title.x=element_blank(),
          strip.background =element_rect(fill="#e0ecf4"),
          panel.grid.major = element_blank(), 
          panel.background = element_blank(), axis.line = element_line(colour = "black"),
          legend.background=element_blank(),
          legend.position = "bottom",
          legend.key = element_rect(colour = "black", fill = NA),
          legend.key.size = unit(2,"line"),
          strip.text = element_text(size = 36),
          plot.title = element_text(size=44,  family="Times New Roman"),
          text = element_text(size=36,  family="Times New Roman"))
  
  p2
}


historical_voting_plot_default<-function(df){
  
  df_plot <- df%>%
    bin_voted() 
  
  df_plot = df_plot%>% 
    filter(!is.na(voted_2012))
  col_names = c("voted_2012")
  
  
  combined_df = tibble()
  for(c in col_names){
    col_var <- sym(c)
    if(c %in% discrete_var){
      
      df_plot <- df_plot %>%
        mutate(!!col_var := factor(!!col_var))
    }
    df_temp = df_plot %>%
      group_by(treatment) %>%
      count(!!col_var) %>% 
      mutate(total = sum(n),
             proportion =  n / total)%>%
      rename(x=col_var)%>%
      mutate(group_id =simpleCap( c)) %>% 
      group_by(x) %>% 
      summarize(
        t=total[treatment=="treatment"],
        tc=total[treatment=="control"],
        t_prop=proportion[treatment=="treatment"],
        c_prop=proportion[treatment=="control"],
        
        diff = (t_prop-c_prop)
        /c_prop,group_id=group_id[1],
        dot_size=n[treatment=="control"])
    
    df_temp= df_temp %>%
      mutate(x = factor(x,levels=c(
        "In Person","Substitution","Other","No Vote"), ordered=TRUE))
    
    combined_df =rbind.data.frame(combined_df,df_temp)
  }
  
  combined_df = combined_df %>% 
    filter(x!="Other") %>% 
    mutate(group_id = ifelse(group_id=="Voted_2012","Vote Method",group_id))
  p2=ggplot(combined_df,aes(x = x, y = diff)) +
    geom_line(aes(group = 1),na.rm=TRUE,color="black",linetype="dashed") + 
    labs(size="Number of registrants\n",
         y = "Relative difference between \ntreatment and control")+
    scale_size_continuous(breaks=c(5000,10000),
                          labels = c("5,000","10,000"))+
    scale_y_continuous(labels = percent,limits=c(-.055,.055)) +
    guides(size=guide_legend(title.position="top", title.hjust = 0.1))+
    scale_color_manual(values=c("black",
                                "#fc8d62"), name="Treatment Assignment\n") + 
    scale_linetype_manual(values=c("dashed", "solid"),name="Treatment Assignment\n") +
    scale_shape_manual(values=c(2, 19),name="Treatment Assignment\n") +
    geom_point(na.rm=TRUE,color="black",aes(size=dot_size)) +
    facet_wrap(~ group_id, scales = "free", ncol = 5, drop = FALSE) 
  
  p2
}

main_effect_vs_historical_voting_rep<-function(df,analysis = "distance"){
  stats=read_csv( "../states_substition_voting.csv")
  pes = c()
    ses = c()
    names = c()
    types = c()
    positions = c()
    facet_id = c()
    percents = c()
    for(type in c("Substitution")){
        count = 1
        if(type=='In Person'){
            vset = c("p")
            offset = -.00
            fi = "Disaggregated by method"
        }
        else if(type=='Substitution'){
            vset = c("a","e","m")
            offset = -.0
            fi = "Disaggregated by method"
        }else{
            vset = c("a","e","m","p")
            offset = 0
            fi = "Overall"
        }
        
        to_model = df%>%
        mutate(voted = ifelse((voted_2016 %in% vset) &!is.na(voted_2016),1,0))
        
        dats  = get_se_pe(to_model)
        
        tse=dats$tse
        point_estimate=dats$pe
        
        
        for(astate in sort(tolower(unique(to_model$state)))){
            count = count +1
            
            
            rescaled_state =
            to_model%>%
            filter(state==astate)
            
            dats  = get_se_pe(rescaled_state)
            
            tse=dats$tse
            point_estimate=dats$pe
            
            pes = c(pes,point_estimate)
            ses = c(ses,tse)
            names = c(names,astate)
            types= c(types,type)
            positions= c(positions,count+offset)
            facet_id= c(facet_id,fi)
            percents = c(percents,filter(stats,state==toupper(astate))$percent_election)
        }
    }
    to_plot <- tibble(state=toupper(names),
    
    point_estimate=pes,
    positions = positions,
    ts_sub = percents,
    vtype = types,
    facet_id = facet_id,
    
    se=ses)
    to_plot
}
plot_historical<-function(to_plot,analysis="distance"){
    ytitle = "Difference in vote rate between \nfarther and closer  block faces"
    if(analysis=="shock"){
        ytitle="Difference in vote rate \n between block face that experienced \nshock that face which did not"
    }
    
    myplot<-ggplot(p1, aes(x=ts_sub, y=point_estimate,
    group=state),
    ) +
    geom_errorbar(aes(ymin=point_estimate-1.96*se, ymax=point_estimate+1.96*se),
    width=.03,
    color="#8da0cb",
    ) +
    ylim(-.15,.15)+
    geom_point(color="#beaed4",
    )+
    geom_text(aes(label=state),hjust=-.5, vjust=.5,  size=12,  family="Times New Roman"
    )+
    geom_hline(yintercept = 0, color = "black", linetype= "dotted")+
    labs(x = "Proportion of population that voted\n by substitution in 2012
    ", y = ytitle)+
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
    panel.background = element_blank(), axis.line = element_line(colour = "black"),
    text = element_text(size=36,  family="Times New Roman"),
    title = element_text(size=36,  family="Times New Roman"),
    axis.text=element_text(size=44,  family="Times New Roman"),
    axis.title=element_text(size=44,  family="Times New Roman"),
    legend.title = element_blank(),legend.position = "none")
}

plot_historical_default<-function(to_plot,analysis="distance"){
  ytitle = "Difference in vote rate between \nfarther and closer  block faces"
  if(analysis=="shock"){
    ytitle="Difference in vote rate \n between block face that experienced \nshock that face which did not"
  }
  
  myplot<-ggplot(p1, aes(x=ts_sub, y=point_estimate,
                         group=state),
  ) +
    geom_errorbar(aes(ymin=point_estimate-1.96*se, ymax=point_estimate+1.96*se),
                  width=.03,
                  color="#8da0cb",
    ) +
    ylim(-.15,.15)+
    geom_point(color="#beaed4",
    )+
    geom_text(aes(label=state),hjust=-.5, vjust=.5,  size=12,  family="Times New Roman"
    )+
    geom_hline(yintercept = 0, color = "black", linetype= "dotted")+
    labs(x = "Proportion of population that voted\n by substitution in 2012
    ", y = ytitle)
}


main_effect_rep<-function(df){
    pes = c()
    ses = c()
    names = c()
    types = c()
    positions = c()
    facet_id = c()
    for(type in c("In Person","Substitution","Any Method")){
        count = 1
        if(type=='In Person'){
            vset = c("p")
            offset = -.00
            fi = "Disaggregated by method"
        }
        else if(type=='Substitution'){
            vset = c("a","e","m")
            offset = -.0
            fi = "Disaggregated by method"
        }else{
            vset = c("a","e","m","p")
            offset = 0
            fi = "Overall"
        }
        model_data = df%>%
        mutate(voted = ifelse((voted_2016 %in% vset) &!is.na(voted_2016),1,0))
        
        
        dats  = get_se_pe(model_data)
        
        tse=dats$tse
        point_estimate=dats$pe
        
        
        pes = c(pes,point_estimate)
        ses = c(ses,tse)
        names = c(names,"all")
        types= c(types,type)
        positions= c(positions,count+offset)
        facet_id= c(facet_id,fi)
        
        for(astate in sort(tolower(unique(model_data$state)))){
            count = count +1

            
            rescaled_state =
            model_data%>%
            filter(state==astate)
            
            dats  = get_se_pe(rescaled_state)
            
            tse=dats$tse
            point_estimate=dats$pe
            
            pes = c(pes,point_estimate)
            ses = c(ses,tse)
            names = c(names,astate)
            types= c(types,type)
            positions= c(positions,count+offset)
            facet_id= c(facet_id,fi)
        }
    }
    to_plot <- tibble(state=toupper(names),
    point_estimate=pes,positions = positions,
    vtype = types,
    facet_id = facet_id,
    se=ses)
    to_plot
}
plot_main_effect_rep<-function(plot,title){
    to_plot = plot %>%
    group_by(vtype)%>%
    mutate(point_estimate = point_estimate*100,
    se=se*100) %>%
    mutate(ymin_all = point_estimate[state=='ALL']-1.96*se[state=='ALL'],
    ymax_all = point_estimate[state=='ALL']+1.96*se[state=='ALL'])%>%
    mutate(point_estimate_all = point_estimate[state=='ALL'])
    plot_later = to_plot %>%
    filter(state=="ALL")
    to_plot = to_plot %>%
    filter(state!="ALL")
    bra = to_plot %>%
    filter(vtype=='Any Method')
    br = as.character(bra$positions)
    st = bra$state
    
    myplot<-ggplot(to_plot, aes(x=factor(positions),group=vtype, y=point_estimate,color=vtype,fill=vtype))+
    geom_bar(stat = "identity",position=position_dodge(),alpha=.2,
    width=ifelse(to_plot$vtype=='AnyMethod',(.35),.7))+
    geom_ribbon(aes(x=factor(positions),ymin=ymin_all,
    ymax=ymax_all,fill=vtype),alpha=0.1,
    position=position_dodge(width=0.7), colour = NA)+
    ylim(-10.5,10.5)+
    facet_grid(cols= vars(facet_id))+
    labs(color = "Voting Method",
    fill = "Voting Method",
    y = title)+
    geom_segment(aes(group=vtype,y=point_estimate_all,yend =point_estimate_all,
    xend=5, color=vtype),
    position=position_dodge(width=0.7))+
    geom_hline(yintercept = 0, color = "black", linetype= "dotted")+
    scale_x_discrete(breaks = br,labels=st)+
    scale_color_manual(values = c("Any Method" = "#66c2a5", "In Person" = "#fc8d62",
    "Substitution"="#8da0cb"))+
    scale_fill_manual(values = c("Any Method" = "#66c2a5", "In Person" = "#fc8d62",
    "Substitution"="#8da0cb"))+
    scale_alpha_manual(values = c("Any Method" = .1, "In Person" = 1,
    "Substitution"=1))+
    
    geom_errorbar(aes(ymin=point_estimate-1.96*se, ymax=point_estimate+1.96*se),
    width=.2,position=position_dodge(width=0.7)
    )+ theme_minimal()+
    theme(panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    strip.background =element_rect(fill="#e0ecf4"),
    panel.grid.major = element_blank(),
    axis.line = element_line(colour = "black"),
    strip.text = element_text(size = 36),
    plot.title = element_text(size=36,  family="Times New Roman"),
    text = element_text(size=36,  family="Times New Roman"),
    axis.title.x=element_blank(),
    legend.key = element_rect(colour = NA, fill = NA),
    panel.spacing = unit(2, "lines")
    
    )
}



plot_main_effect_rep_default<-function(plot,title){
  to_plot = plot %>%
    group_by(vtype)%>%
    mutate(point_estimate = point_estimate*100,
           se=se*100) %>%
    mutate(ymin_all = point_estimate[state=='ALL']-1.96*se[state=='ALL'],
           ymax_all = point_estimate[state=='ALL']+1.96*se[state=='ALL'])%>%
    mutate(point_estimate_all = point_estimate[state=='ALL'])
  plot_later = to_plot %>%
    filter(state=="ALL")
  to_plot = to_plot %>%
    filter(state!="ALL")
  bra = to_plot %>%
    filter(vtype=='Any Method')
  br = as.character(bra$positions)
  st = bra$state
  
  myplot<-ggplot(to_plot, aes(x=factor(positions),group=vtype, y=point_estimate,color=vtype,fill=vtype))+
    geom_bar(stat = "identity",position=position_dodge(),alpha=.2,
             width=ifelse(to_plot$vtype=='AnyMethod',(.35),.7))+
    geom_ribbon(aes(x=factor(positions),ymin=ymin_all,
                    ymax=ymax_all,fill=vtype),alpha=0.1,
                position=position_dodge(width=0.7), colour = NA)+
    ylim(-10.5,10.5)+
    facet_grid(cols= vars(facet_id))+
    labs(color = "Voting Method",
         fill = "Voting Method",
         y = title)+
    geom_segment(aes(group=vtype,y=point_estimate_all,yend =point_estimate_all,
                     xend=5, color=vtype),
                 position=position_dodge(width=0.7))+
    geom_hline(yintercept = 0, color = "black", linetype= "dotted")+
    scale_x_discrete(breaks = br,labels=st)+
    scale_color_manual(values = c("Any Method" = "#66c2a5", "In Person" = "#fc8d62",
                                  "Substitution"="#8da0cb"))+
    scale_fill_manual(values = c("Any Method" = "#66c2a5", "In Person" = "#fc8d62",
                                 "Substitution"="#8da0cb"))+
    scale_alpha_manual(values = c("Any Method" = .1, "In Person" = 1,
                                  "Substitution"=1))+
    
    geom_errorbar(aes(ymin=point_estimate-1.96*se, ymax=point_estimate+1.96*se),
                  width=.2,position=position_dodge(width=0.7)
    )+ theme_minimal()
}  
    

    combine_plots_conditional_shock<-function(dist){
        
        to_plot = dist$`1`
        to_plot2 = dist$`2`
        to_plot3 = dist$`3`
        
        p1 = plot_shock_condition_one(to_plot,"Difference in vote rate\n between shocked and \nnot shocked block faces")
        
        p2 = plot_shock_condition_two(to_plot2,
        "Percent of voters \nliving in rural area",lim_one=0,lim_two=.33)
        p3 = plot_shock_condition_two(to_plot3,"Percent of voters \n who are white",lim_one = .68,lim_two = .93)
        
        
        legend = get_legend(p3+ theme(legend.position=c(0.55,0.8),legend.direction = "horizontal"))
        p2 = p2+ theme(legend.position="none",plot.margin = unit(c(.1,.1,1.7,.1), "cm"))
        p3  = p3+  theme(legend.position="none",plot.margin = unit(c(1.9,.1,.1,.1), "cm"))
        p1=p1+  theme(plot.margin = unit(c(.1,.1,1.7,.1), "cm"))
        g=plot_grid(p1, p2, p3, align = "v", nrow = 3, rel_heights = c(1/2,1/4, 1/4))
        p <- plot_grid(g, legend, nrow = 2, align = "c",rel_heights = c(1, .1))
        p
    }
    
    plot_shock_condition_one<-function(to_plot,title=""){
        ggplot(to_plot, aes(x=state, y=point_estimate)) +
        scale_y_continuous(limits = c(-0.17, 0.07), breaks = c(-0.15,-0.10, -0.05,0, 0.05))+
        geom_line( aes(x=as.numeric(to_plot$state),
        y=as.numeric(to_plot$point_estimate)),color="#beaed4",show.legend = FALSE) +
        geom_hline(yintercept = 0, color = "#2CA25F", linetype= "dotted")+
        geom_ribbon(aes(x=as.numeric(to_plot$state),ymin=as.numeric(point_estimate)-1.96*as.numeric(se),
        ymax=as.numeric(point_estimate)+1.96*as.numeric(se)),
        fill="#fdc086",alpha=0.3)+
        labs(x = "",
        y = title,
        fill = "Number of voters")+
        scale_x_continuous(breaks = c(-1,-.5,0.0,.5,1.0),
        labels=c("-1","\u2265 -.5 miles","\u2265 0 miles","\u2265 0.5 miles",
        "\u2265 1 miles"))+
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        text = element_text(size=36,  family="Times New Roman"),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
    }
    
    plot_shock_condition_two<-function(to_plot,title="",lim_one=.05,lim_two=.2){
        ggplot(to_plot, aes(x=state, y=point_estimate,size= as.numeric(sizes))) +
        geom_line(aes(x=as.numeric(to_plot$state), y=as.numeric(to_plot$point_estimate))
        ,color="#66c2a5",show.legend = FALSE, size = 1) +
        geom_point(aes(x=as.numeric(to_plot$state,size= as.numeric(sizes))),color="#66c2a5")+
        scale_size_continuous(
        breaks=c(500,2500,25000),
        labels = c("500","2500","25000"))+
        scale_y_continuous(labels=percent,limits=c(lim_one,lim_two)) +
        labs( x="",
        y = title,
        fill = "Number of voters",
        size = "Number of voters")+
        scale_x_continuous(breaks = c(-1,-.5,0.0,.5,1.0),
        labels=c("-1","\u2265 -.5 miles","\u2265 0 miles","\u2265 0.5 miles",
        "\u2265 1 miles"))+
        guides(size=guide_legend(title.position="top",legend.title.align=0.5))+
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.position = "bottom",
        legend.key = element_rect(fill = NA, color = "black"),
        text = element_text(size=36,  family="Times New Roman"),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
        
        
    }

    combine_plots_conditional_shock_default<-function(dist){
      
      to_plot = dist$`1`
      to_plot2 = dist$`2`
      to_plot3 = dist$`3`
      
      p1 = plot_shock_condition_one_default(to_plot,"Difference in vote rate\n between shocked and \nnot shocked block faces")
      
      p2 = plot_shock_condition_two_default(to_plot2,
                                    "Percent of voters \nliving in rural area",lim_one=0,lim_two=.33)
      p3 = plot_shock_condition_two_default(to_plot3,"Percent of voters \n who are white",lim_one = .68,lim_two = .93)
      
      
      legend = get_legend(p3+ theme(legend.position=c(0.55,0.8),legend.direction = "horizontal"))
      p2 = p2+ theme(legend.position="none",plot.margin = unit(c(.1,.1,1.7,.1), "cm"))
      p3  = p3+  theme(legend.position="none",plot.margin = unit(c(1.9,.1,.1,.1), "cm"))
      p1=p1+  theme(plot.margin = unit(c(.1,.1,1.7,.1), "cm"))
      g=plot_grid(p1, p2, p3, align = "v", nrow = 3, rel_heights = c(1/2,1/4, 1/4))
      p <- plot_grid(g, legend, nrow = 2, align = "c",rel_heights = c(1, .1))
      p
    }
    
    plot_shock_condition_one_default<-function(to_plot,title=""){
      ggplot(to_plot, aes(x=state, y=point_estimate)) +
        scale_y_continuous(limits = c(-0.17, 0.07), breaks = c(-0.15,-0.10, -0.05,0, 0.05))+
        geom_line( aes(x=as.numeric(to_plot$state),
                       y=as.numeric(to_plot$point_estimate)),color="#beaed4",show.legend = FALSE) +
        geom_hline(yintercept = 0, color = "#2CA25F", linetype= "dotted")+
        geom_ribbon(aes(x=as.numeric(to_plot$state),ymin=as.numeric(point_estimate)-1.96*as.numeric(se),
                        ymax=as.numeric(point_estimate)+1.96*as.numeric(se)),
                    fill="#fdc086",alpha=0.3)+
        labs(x = "",
             y = title,
             fill = "Number of voters")+
        scale_x_continuous(breaks = c(-1,-.5,0.0,.5,1.0),
                           labels=c("-1","\u2265 -.5 miles","\u2265 0 miles","\u2265 0.5 miles",
                                    "\u2265 1 miles"))
    }
    
    plot_shock_condition_two_default<-function(to_plot,title="",lim_one=.05,lim_two=.2){
      ggplot(to_plot, aes(x=state, y=point_estimate,size= as.numeric(sizes))) +
        geom_line(aes(x=as.numeric(to_plot$state), y=as.numeric(to_plot$point_estimate))
                  ,color="#66c2a5",show.legend = FALSE, size = 1) +
        geom_point(aes(x=as.numeric(to_plot$state,size= as.numeric(sizes))),color="#66c2a5")+
        scale_size_continuous(
          breaks=c(500,2500,25000),
          labels = c("500","2500","25000"))+
        scale_y_continuous(labels=percent,limits=c(lim_one,lim_two)) +
        labs( x="",
              y = title,
              fill = "Number of voters",
              size = "Number of voters")+
        scale_x_continuous(breaks = c(-1,-.5,0.0,.5,1.0),
                           labels=c("-1","\u2265 -.5 miles","\u2265 0 miles","\u2265 0.5 miles",
                                    "\u2265 1 miles"))+
        guides(size=guide_legend(title.position="top",legend.title.align=0.5))
      
      
    }    
    
distance_to_polling_place_scatter_rep<-function(df,analysis="distance"){
      ytitle = "Average additional distance\n of treatment group (miles)"
      ycol = "distance_diff"
      if(analysis=="shock"){
        ytitle = "Average distance of treatment group (miles)"
        ycol = "distance_treatment"
      } 
      
    df_plot = df %>%
    mutate(distance_control =
    ifelse(treatment=='control',distance_to_pp_2016,-1))%>%
    mutate(distance_treatment =
    ifelse(treatment=='treatment',distance_to_pp_2016,-1))
    test = df_plot %>%
    group_by(block_id)%>%
    summarize(distance_control =
    mean(distance_to_pp_2016[treatment=='control']),
    distance_treatment =
    mean(distance_to_pp_2016[treatment=='treatment']),
    distance_diff = distance_treatment-distance_control)
    
    blocks = sample(unique(df_plot$block_id),1000)
    small = test%>%
    filter(block_id %in% blocks)
    ggplot(data = small,aes(x = distance_control,
    y = !!sym(ycol)),na.rm=T) +
    geom_point(color="#fc8d59",alpha=.3,size=2)+
    xlim(0,2.1)+ylim(0,1.5)+coord_fixed()+
    geom_smooth(data=test,aes(x = distance_control,
    y = !!sym(ycol)),color="#66c2a5", fill="#8da0cb",method='lm')+
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
    panel.background = element_blank(), axis.line = element_line(colour = "black"),
    text = element_text(size=24,  family="Times New Roman"),
    legend.key = element_rect(colour = NA, fill = NA),
    strip.background = element_blank(),
    strip.text = element_blank(),panel.spacing = unit(2, "lines")
    
    ) +labs(
    x = "Average distance of control group (miles)",
    y = ytitle,
    
    fill="Number of blocks")
}


distance_to_polling_place_scatter_rep_default<-function(df,analysis="distance"){
  ytitle = "Average additional distance\n of treatment group (miles)"
  ycol = "distance_diff"
  if(analysis=="shock"){
    ytitle = "Average distance of treatment group (miles)"
    ycol = "distance_treatment"
  } 
  
  df_plot = df %>%
    mutate(distance_control =
             ifelse(treatment=='control',distance_to_pp_2016,-1))%>%
    mutate(distance_treatment =
             ifelse(treatment=='treatment',distance_to_pp_2016,-1))
  test = df_plot %>%
    group_by(block_id)%>%
    summarize(distance_control =
                mean(distance_to_pp_2016[treatment=='control']),
              distance_treatment =
                mean(distance_to_pp_2016[treatment=='treatment']),
              distance_diff = distance_treatment-distance_control)
  
  blocks = sample(unique(df_plot$block_id),1000)
  small = test%>%
    filter(block_id %in% blocks)
  ggplot(data = small,aes(x = distance_control,
                          y = !!sym(ycol)),na.rm=T) +
    geom_point(color="#fc8d59",alpha=.3,size=2)+
    xlim(0,2.1)+ylim(0,1.5)+coord_fixed()+
    geom_smooth(data=test,aes(x = distance_control,
                              y = !!sym(ycol)),color="#66c2a5", fill="#8da0cb",method='lm')+
    labs(
      x = "Average distance of control group (miles)",
      y = ytitle,
      fill="Number of blocks")
}

conditional_distance_replication<-function(df_one,df_two){
  
  vset = c('p')
  
  windows = c(seq(-.5,1.0,.2))
  
  
  df_one = df_one %>%
    mutate(voted = ifelse(voted_2016 %in% vset,1,0))
  
  
  dfa = df_one %>%
    group_by(block_id)%>%
    mutate(extra = mean(change_in_distance,na.rm=T))
  
  df_two = df_two %>%
    group_by(block_id)%>%
    mutate(extra = mean(change_in_distance[treatment=='treatment'],na.rm=T))
  
  
  
  intercepts = c()
  ses = c()
  rural = c()
  sizes = c()
  races = c()
  
  for(w in windows){
    bids = filter(dfa,treatment=='treatment' & extra>=w)$block_id
    sliced = dfa %>%
      filter(block_id %in% bids)
    dats  = get_se_pe(sliced)
    
    tse=dats$tse
    point_estimate=dats$pe
    
    intercepts = c(intercepts,point_estimate)
    ses = c(ses,tse)
    
    att_df = df_two %>%
      filter( extra>=w )
    
    people = sum(att_df$num_people)
    percent_rural = sum(att_df$num_rural)/people
    
    rural = c(rural,percent_rural )
    sizes = c(sizes,people)
    
    percent_white = sum(att_df$num_white)/people
    
    races = c(races,percent_white )
    
    
  }
  to_plot<-tibble(state=toupper(windows[1:length(windows) ]),
                  point_estimate = intercepts,
                  se = ses)
  to_plot2 <- tibble(state=toupper(windows[1:length(windows) ]),
                     point_estimate = rural,
                     sizes = sizes)
  
  to_plot3 <- tibble(state=toupper(windows[1:length(windows) ]),
                     point_estimate = races,
                     sizes = sizes)
  
  
  list("1"=to_plot,"2"=to_plot2,"3"=to_plot3)
}

balance_plot_secondary_diff<-function(df,analysis='shock',breaks = c(),labels=c()){
  
  blevels = c("5> Population","10> Population \u2265 5",
              "20> Population \u2265 10",
              "Population \u2265 20")
  df_blocks = df %>%
    group_by(state,county,block_id,treatment)%>%
    summarize(total_people = min(total_in_assignment)) 
  
  df_blocks = df_blocks %>%
    mutate("Population" = case_when(total_people<5~"5> Population",
                                    total_people>=5 & total_people<10~"10> Population \u2265 5",
                                    total_people>=10 &total_people<20~ "20> Population \u2265 10",
                                    total_people>=20 ~ "Population \u2265 20"))%>%
    mutate(`Population` = factor(`Population`,levels=(blevels))) 
  
  col_names = c("Population")
  combined_df = tibble()
  for(c in col_names){
    col_var <- sym(c)
    
    df_temp = df_blocks %>%
      group_by(treatment) %>%
      count(!!col_var) %>% 
      mutate(total = sum(n),
             proportion =  n / total)%>%
      rename(x=col_var)%>%
      mutate(group_id =simpleCap( c))%>% 
      ungroup() %>% 
      group_by(x) %>% 
      summarize(diff = (proportion[treatment=="treatment"]-proportion[treatment=="control"])
                /proportion[treatment=="control"],
               group_id=group_id[1],dot_size=n[treatment=="control"])
    
    combined_df =rbind.data.frame(combined_df,df_temp)
  }

  p=ggplot(combined_df,aes(x = x, y = diff)) +
    geom_line(aes(group = 1),na.rm=TRUE,color="black",linetype="dashed") + 
    labs(size="Number of \nblock faces\n",
         y = "Relative difference between \ntreatment and control")+
    scale_size_continuous(breaks=breaks,
                          labels = labels)+
    scale_y_continuous(labels = percent,limits=c(-.3,.3)) +
    guides(size=guide_legend(title.position="top", title.hjust = 0.1))+
    scale_color_manual(values=c("black",
                                "#fc8d62"), name="Treatment Assignment\n") + 
    scale_linetype_manual(values=c("dashed", "solid"),name="Treatment Assignment\n") +
    scale_shape_manual(values=c(2, 19),name="Treatment Assignment\n") +
    geom_point(na.rm=TRUE,color="black",aes(size=dot_size)) +
    facet_wrap(~ group_id, scales = "free", ncol = 5, drop = FALSE) + 
    theme(axis.text.x = element_text(angle = 40, vjust=.7, hjust=.7),
          panel.grid.minor = element_blank(), 
          panel.border = element_blank(), axis.title.x=element_blank(),
          strip.background =element_rect(fill="#e0ecf4"),
          panel.grid.major = element_blank(), 
          panel.background = element_blank(), axis.line = element_line(colour = "black"),
          legend.background=element_blank(),
          legend.position = "bottom",
          legend.key = element_rect(colour = "black", fill = NA),
          legend.key.size = unit(2,"line"),
          strip.text = element_text(size = 38),
          text = element_text(size=32,  family="Times New Roman"))
  p
}
balance_plot_main_diff<-function(df,analysis='shock',breaks=c(),labels=c()){


  df_plot <- df%>%
    bin_age() %>% 
    filter(!is.na(age))
  
  
  df_plot = df_plot %>%
    mutate(party = ifelse(party %in% c("democrat","republican"),party,"other"))%>%
    mutate(race = ifelse(race %in% c("african-american","asian","caucasian","hispanic"),
                         race,"other"))%>%
    mutate(race = str_to_title(race)) %>%
    mutate(gender= ifelse(gender %in% c("female"),gender,
                          "male"))%>%
    mutate(gender = sapply(gender,simpleCap)) %>%
    mutate(party = sapply(party,simpleCap)) %>%
    mutate(race = ifelse(race %in% c("African-American"),
                         "Black",race))%>%
    mutate(race = ifelse(race %in% c("Caucasian"),
                         "White",race))%>%
    mutate(party = factor(party,levels=c("Democrat","Republican","Other")))
  
  col_names = c("age","gender","party","race")
  combined_df = tibble()
  
  for(c in col_names){
    col_var <- sym(c)
    if(c %in% discrete_var){
      
      df_plot <- df_plot %>%
        mutate(!!col_var := factor(!!col_var))
    }
    df_temp = df_plot %>%
      group_by(treatment) %>%
      count(!!col_var) %>% 
      mutate(total = sum(n),
             proportion =  n / total)%>%
      rename(x=col_var)%>%
      mutate(group_id =simpleCap( c)) %>% 
      group_by(x) %>% 
      summarize(
        t=total[treatment=="treatment"],
        tc=total[treatment=="control"],
        t_prop=proportion[treatment=="treatment"],
        c_prop=proportion[treatment=="control"],
        
        diff = (proportion[treatment=="treatment"]-proportion[treatment=="control"])
        /proportion[treatment=="control"]
        ,group_id=group_id[1],
        dot_size=n[treatment=="control"])
    if(c=='party'){

      df_temp= df_temp %>%
        mutate(x = factor(x,levels=c("Democrat","Republican","Other ","Female","Male",
                                     "18-25" ,"25-35", "35-50" ,"50-70" , "70+" ,
                                     "White"  ,   "Black" ,     "Hispanic"  ,     "Asian"    , "Other"    ), ordered=TRUE))

    }
    else{
      df_temp= df_temp %>%
        mutate(x=factor(x))
    }
    combined_df =rbind.data.frame(combined_df,df_temp)
  }


  
  p=ggplot(combined_df,aes(x = x, y = diff)) +
    geom_line(aes(x = x, y = diff,group = 1),na.rm=TRUE,color="black",linetype="dashed") + 
    labs(size="Number of registrants\n",
         y = "Relative difference between \ntreatment and control ")+
    scale_y_continuous(labels = percent,limits=c(-.4,.4)) +
    scale_size_continuous(breaks=breaks,
                          labels = labels)+
    geom_point(na.rm=TRUE,color="black",aes(size=dot_size)) +
    facet_wrap(~ group_id, scales = "free", ncol = 5, drop = FALSE) + 
    guides(size=guide_legend(title.position="top", title.hjust = 0.1))+
    theme(axis.text.x = element_text(angle = 40, vjust=.7, hjust=.7),
          panel.grid.minor = element_blank(), 
          panel.border = element_blank(), axis.title.x=element_blank(),
          strip.background =element_rect(fill="#e0ecf4"),
          panel.grid.major = element_blank(), 
          panel.background = element_blank(), axis.line = element_line(colour = "black"),
          legend.position = "bottom",
          
          legend.background=element_blank(),
          legend.key = element_rect(colour = "black", fill = NA),
          legend.key.size = unit(2,"line"),
          strip.text = element_text(size = 40),
          text = element_text(size=36,  family="Times New Roman"))
  p
  
}


save_balance_plot_diffs<-function(df,analysis="shock",save_name="",
                                  b_one = c(),b_two = c(),l_one=c(),l_two=c()){
  
  p2 = balance_plot_main_diff(df,analysis,breaks=b_one,labels = l_one)
  
  p = balance_plot_secondary_diff(df,analysis,breaks=b_two,labels = l_two)
  
  
  legend = gtable_filter(ggplot_gtable(ggplot_build(p2)), "guide-box")
  
  g1 <- ggplotGrob(p+theme(plot.margin=margin(1,1,1,1, unit = "cm")))
  g2 <- ggplotGrob(p2+theme(plot.margin=margin(1,1,1,1, unit = "cm"))
                   #+ theme(legend.position="none")
  )
  
  
  x=gridExtra::gtable_rbind(gtable_cbind(g2,g1))
  
  grid.arrange(x,
               
               heights=c(1.1,.1, 0.1),
               nrow = 3)
  
  g=arrangeGrob(x,
                
                heights=c(1.1, .1,0.1),
                nrow = 3)
g
}

balance_plot_secondary_diff_default<-function(df,analysis='shock',breaks = c(),labels=c()){
  
  blevels = c("5> Population","10> Population \u2265 5",
              "20> Population \u2265 10",
              "Population \u2265 20")
  df_blocks = df %>%
    group_by(state,county,block_id,treatment)%>%
    summarize(total_people = min(total_in_assignment)) 
  
  df_blocks = df_blocks %>%
    mutate("Population" = case_when(total_people<5~"5> Population",
                                    total_people>=5 & total_people<10~"10> Population \u2265 5",
                                    total_people>=10 &total_people<20~ "20> Population \u2265 10",
                                    total_people>=20 ~ "Population \u2265 20"))%>%
    mutate(`Population` = factor(`Population`,levels=(blevels))) 
  
  col_names = c("Population")
  combined_df = tibble()
  for(c in col_names){
    col_var <- sym(c)
    
    df_temp = df_blocks %>%
      group_by(treatment) %>%
      count(!!col_var) %>% 
      mutate(total = sum(n),
             proportion =  n / total)%>%
      rename(x=col_var)%>%
      mutate(group_id =simpleCap( c))%>% 
      ungroup() %>% 
      group_by(x) %>% 
      summarize(diff = (proportion[treatment=="treatment"]-proportion[treatment=="control"])
                /proportion[treatment=="control"],
                group_id=group_id[1],dot_size=n[treatment=="control"])
    
    combined_df =rbind.data.frame(combined_df,df_temp)
  }
  
  p=ggplot(combined_df,aes(x = x, y = diff)) +
    geom_line(aes(group = 1),na.rm=TRUE,color="black",linetype="dashed") + 
    labs(size="Number of \nblock faces\n",
         y = "Relative difference between \ntreatment and control")+
    scale_size_continuous(breaks=breaks,
                          labels = labels)+
    scale_y_continuous(labels = percent,limits=c(-.3,.3)) +
    guides(size=guide_legend(title.position="top", title.hjust = 0.1))+
    scale_color_manual(values=c("black",
                                "#fc8d62"), name="Treatment Assignment\n") + 
    scale_linetype_manual(values=c("dashed", "solid"),name="Treatment Assignment\n") +
    scale_shape_manual(values=c(2, 19),name="Treatment Assignment\n") +
    geom_point(na.rm=TRUE,color="black",aes(size=dot_size)) +
    facet_wrap(~ group_id, scales = "free", ncol = 5, drop = FALSE)
  p
}
balance_plot_main_diff_default<-function(df,analysis='shock',breaks=c(),labels=c()){
  
  
  df_plot <- df%>%
    bin_age() %>% 
    filter(!is.na(age))
  
  
  df_plot = df_plot %>%
    mutate(party = ifelse(party %in% c("democrat","republican"),party,"other"))%>%
    mutate(race = ifelse(race %in% c("african-american","asian","caucasian","hispanic"),
                         race,"other"))%>%
    mutate(race = str_to_title(race)) %>%
    mutate(gender= ifelse(gender %in% c("female"),gender,
                          "male"))%>%
    mutate(gender = sapply(gender,simpleCap)) %>%
    mutate(party = sapply(party,simpleCap)) %>%
    mutate(race = ifelse(race %in% c("African-American"),
                         "Black",race))%>%
    mutate(race = ifelse(race %in% c("Caucasian"),
                         "White",race))%>%
    mutate(party = factor(party,levels=c("Democrat","Republican","Other")))
  
  col_names = c("age","gender","party","race")
  combined_df = tibble()
  
  for(c in col_names){
    col_var <- sym(c)
    if(c %in% discrete_var){
      
      df_plot <- df_plot %>%
        mutate(!!col_var := factor(!!col_var))
    }
    df_temp = df_plot %>%
      group_by(treatment) %>%
      count(!!col_var) %>% 
      mutate(total = sum(n),
             proportion =  n / total)%>%
      rename(x=col_var)%>%
      mutate(group_id =simpleCap( c)) %>% 
      group_by(x) %>% 
      summarize(
        t=total[treatment=="treatment"],
        tc=total[treatment=="control"],
        t_prop=proportion[treatment=="treatment"],
        c_prop=proportion[treatment=="control"],
        
        diff = (proportion[treatment=="treatment"]-proportion[treatment=="control"])
        /proportion[treatment=="control"]
        ,group_id=group_id[1],
        dot_size=n[treatment=="control"])
    if(c=='party'){
      
      df_temp= df_temp %>%
        mutate(x = factor(x,levels=c("Democrat","Republican","Other ","Female","Male",
                                     "18-25" ,"25-35", "35-50" ,"50-70" , "70+" ,
                                     "White"  ,   "Black" ,     "Hispanic"  ,     "Asian"    , "Other"    ), ordered=TRUE))
      
    }
    else{
      df_temp= df_temp %>%
        mutate(x=factor(x))
    }
    combined_df =rbind.data.frame(combined_df,df_temp)
  }
  
  
  
  p=ggplot(combined_df,aes(x = x, y = diff)) +
    geom_line(aes(x = x, y = diff,group = 1),na.rm=TRUE,color="black",linetype="dashed") + 
    labs(size="Number of registrants\n",
         y = "Relative difference between \ntreatment and control ")+
    scale_y_continuous(labels = percent,limits=c(-.4,.4)) +
    scale_size_continuous(breaks=breaks,
                          labels = labels)+
    geom_point(na.rm=TRUE,color="black",aes(size=dot_size)) +
    facet_wrap(~ group_id, scales = "free", ncol = 5, drop = FALSE) + 
    guides(size=guide_legend(title.position="top", title.hjust = 0.1))
  p
  
}


save_balance_plot_diffs_default<-function(df,analysis="shock",save_name="",
                                  b_one = c(),b_two = c(),l_one=c(),l_two=c()){
  
  p2 = balance_plot_main_diff_default(df,analysis,breaks=b_one,labels = l_one)
  
  p = balance_plot_secondary_diff_default(df,analysis,breaks=b_two,labels = l_two)
  
  
  legend = gtable_filter(ggplot_gtable(ggplot_build(p2)), "guide-box")
  
  g1 <- ggplotGrob(p+theme(plot.margin=margin(1,1,1,1, unit = "cm")))
  g2 <- ggplotGrob(p2+theme(plot.margin=margin(1,1,1,1, unit = "cm"))
                   #+ theme(legend.position="none")
  )
  
  
  x=gridExtra::gtable_rbind(gtable_cbind(g2,g1))
  
  grid.arrange(x,
               
               heights=c(1.1,.1, 0.1),
               nrow = 3)
  
  g=arrangeGrob(x,
                
                heights=c(1.1, .1,0.1),
                nrow = 3)
  g
}




plot_distance_condition_one<-function(to_plot,title=""){
  ggplot(to_plot, aes(x=state, y=point_estimate)) + 
    geom_line( aes(x=as.numeric(to_plot$state),
                   y=as.numeric(to_plot$point_estimate)),color="#beaed4",show.legend = FALSE) +
    geom_hline(yintercept = 0, color = "#2CA25F", linetype= "dotted")+ 
    geom_ribbon(aes(x=as.numeric(to_plot$state),ymin=as.numeric(point_estimate)-1.96*as.numeric(se),
                    ymax=as.numeric(point_estimate)+1.96*as.numeric(se)),
                fill="#fdc086",alpha=0.3)+
    labs(x = "", 
         y = title,
         fill = "Number of voters")+
    scale_x_continuous(breaks = c(0,.5,1.0,1.5,2),
                       labels=c("0","\u2265 .5 miles","\u2265 1 miles","\u2265 1.5 miles",
                                "\u2265 2 miles"))+
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          text = element_text(size=36,  family="Times New Roman"),
          panel.background = element_blank(), axis.line = element_line(colour = "black"))
}


plot_distance_condition_two<-function(to_plot,title="",lim_one=.05,lim_two=.2){
  ggplot(to_plot, aes(x=state, y=point_estimate,size= as.numeric(sizes))) + 
    geom_line(aes(x=as.numeric(to_plot$state), y=as.numeric(to_plot$point_estimate))
              ,color="#66c2a5",show.legend = FALSE, size = 1) +
    geom_point(aes(x=as.numeric(to_plot$state,size= as.numeric(sizes))),color="#66c2a5")+
    scale_size_continuous(breaks=c(25000,50000,200000),
                          labels = c("25,000","50,000","200,000"))+
    guides(size=guide_legend(title.position="top",title.align=0.5))+
    scale_y_continuous(labels = percent,limits=c(lim_one,lim_two)) +
    labs( x="",
          y = title,
          fill = "Number of voters",
          size = "Number of voters")+
    scale_x_continuous(breaks = c(0,.5,1.0,1.5,2),
                       labels=c("0","\u2265 .5 miles","\u2265 1 miles","\u2265 1.5 miles",
                                "\u2265 2 miles"))+
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          legend.position = "bottom",
          legend.key = element_rect(fill = NA, color = "black"),
          text = element_text(size=36,  family="Times New Roman"),
          panel.background = element_blank(), axis.line = element_line(colour = "black"))
  
  
}

combine_plots_conditional_distance<-function(dist){
  
  to_plot = dist$`1`
  to_plot2 = dist$`2`
  to_plot3 = dist$`3`
  
  p1 = plot_distance_condition_one(to_plot,"Difference in vote rate\n between farther and closer \n block faces")
  
  p2 = plot_distance_condition_two(to_plot2,lim_one = .05,lim_two = .20,
                                   "Percent of voters \nliving in rural area")
  p3 = plot_distance_condition_two(to_plot3,"Percent of voters\n who are white",lim_one = .70,lim_two = .93)
  
  #,legend.justification="center" ,legend.box.just = "bottom"
  legend = get_legend(p2+ theme(legend.position=c(0.55,0.8),legend.direction = "horizontal"))
  p2 = p2+ theme(legend.position="none",plot.margin = unit(c(.1,.1,2.7,.1), "cm"))
  p3  = p3+  theme(legend.position="none",plot.margin = unit(c(2.7,.1,1,.1), "cm"))
  p1 = p1+ theme(plot.margin = unit(c(.1,.1,2.7,.1), "cm"))
  #, rel_heights = c(1/2, 1/4, 1/4)
  g=plot_grid(p1, p2, p3, align = "v", nrow = 3,  rel_heights = c(1/2, 1/4, 1/4))
  p <- plot_grid(g, legend, nrow = 2, align = "c",rel_heights = c(1, .1))
  p
}


plot_distance_condition_one_default<-function(to_plot,title=""){
  ggplot(to_plot, aes(x=state, y=point_estimate)) + 
    geom_line( aes(x=as.numeric(to_plot$state),
                   y=as.numeric(to_plot$point_estimate)),color="#beaed4",show.legend = FALSE) +
    geom_hline(yintercept = 0, color = "#2CA25F", linetype= "dotted")+ 
    geom_ribbon(aes(x=as.numeric(to_plot$state),ymin=as.numeric(point_estimate)-1.96*as.numeric(se),
                    ymax=as.numeric(point_estimate)+1.96*as.numeric(se)),
                fill="#fdc086",alpha=0.3)+
    labs(x = "", 
         y = title,
         fill = "Number of voters")+
    scale_x_continuous(breaks = c(0,.5,1.0,1.5,2),
                       labels=c("0","\u2265 .5 miles","\u2265 1 miles","\u2265 1.5 miles",
                                "\u2265 2 miles"))
  }


plot_distance_condition_two_default<-function(to_plot,title="",lim_one=.05,lim_two=.2){
  ggplot(to_plot, aes(x=state, y=point_estimate,size= as.numeric(sizes))) + 
    geom_line(aes(x=as.numeric(to_plot$state), y=as.numeric(to_plot$point_estimate))
              ,color="#66c2a5",show.legend = FALSE, size = 1) +
    geom_point(aes(x=as.numeric(to_plot$state,size= as.numeric(sizes))),color="#66c2a5")+
    scale_size_continuous(breaks=c(25000,50000,200000),
                          labels = c("25,000","50,000","200,000"))+
    guides(size=guide_legend(title.position="top",title.align=0.5))+
    scale_y_continuous(labels = percent,limits=c(lim_one,lim_two)) +
    labs( x="",
          y = title,
          fill = "Number of voters",
          size = "Number of voters")+
    scale_x_continuous(breaks = c(0,.5,1.0,1.5,2),
                       labels=c("0","\u2265 .5 miles","\u2265 1 miles","\u2265 1.5 miles",
                                "\u2265 2 miles"))
}

combine_plots_conditional_distance_default<-function(dist){
  
  to_plot = dist$`1`
  to_plot2 = dist$`2`
  to_plot3 = dist$`3`
  
  p1 = plot_distance_condition_one_default(to_plot,"Difference in vote rate\n between farther and closer \n block faces")
  
  p2 = plot_distance_condition_two_default(to_plot2,lim_one = .05,lim_two = .20,
                                   "Percent of voters \nliving in rural area")
  p3 = plot_distance_condition_two_default(to_plot3,"Percent of voters\n who are white",lim_one = .70,lim_two = .93)
  
  #,legend.justification="center" ,legend.box.just = "bottom"
  legend = get_legend(p2+ theme(legend.position=c(0.55,0.8),legend.direction = "horizontal"))
  p2 = p2+ theme(legend.position="none",plot.margin = unit(c(.1,.1,2.7,.1), "cm"))
  p3  = p3+  theme(legend.position="none",plot.margin = unit(c(2.7,.1,1,.1), "cm"))
  p1 = p1+ theme(plot.margin = unit(c(.1,.1,2.7,.1), "cm"))
  #, rel_heights = c(1/2, 1/4, 1/4)
  g=plot_grid(p1, p2, p3, align = "v", nrow = 3,  rel_heights = c(1/2, 1/4, 1/4))
  p <- plot_grid(g, legend, nrow = 2, align = "c",rel_heights = c(1, .1))
  p
}




conditional_distance_replication_windows<-function(df_one,df_two){
  
  vset = c('p')
  
  windows = c(seq(0,2.0,.05))
  
  
  df_one = df_one %>%
    mutate(voted = ifelse(voted_2016 %in% vset,1,0))
  
  
  dfa = df_one %>%
    group_by(block_id)%>%
    mutate(extra = mean(change_in_distance,na.rm=T))
  
  df_two = df_two %>%
    group_by(block_id)%>%
    mutate(extra = mean(change_in_distance,na.rm=T))
  
  
  
  intercepts = c()
  ses = c()
  rural = c()
  sizes = c()
  races = c()
  
  for(w in windows){
    bids = filter(dfa,treatment=='treatment' & extra>=w)$block_id
    sliced = dfa %>%
      filter(block_id %in% bids)
    dats  = get_se_pe(sliced)
    
    tse=dats$tse
    point_estimate=dats$pe
    
    intercepts = c(intercepts,point_estimate)
    ses = c(ses,tse)
    
    att_df = df_two %>%
      filter( extra>=w )
    
    people = sum(att_df$num_people)
    percent_rural = sum(att_df$num_rural)/people
    
    rural = c(rural,percent_rural )
    sizes = c(sizes,people)
    
    percent_white = sum(att_df$num_white)/people
    
    races = c(races,percent_white )
    
    
  }
  to_plot<-tibble(state=toupper(windows[1:length(windows) ]),
                  point_estimate = intercepts,
                  se = ses)
  to_plot2 <- tibble(state=toupper(windows[1:length(windows) ]),
                     point_estimate = rural,
                     sizes = sizes)
  
  to_plot3 <- tibble(state=toupper(windows[1:length(windows) ]),
                     point_estimate = races,
                     sizes = sizes)
  
  
  list("1"=to_plot,"2"=to_plot2,"3"=to_plot3)
}
